To see the code, click on the CODE button. You can also download the whole R Markdown file from the drop down menu on the top right corner.
library(tidyverse)
library(ggtext)
library(patchwork)
library(readxl)
library(nullabor)
library(here)
library(janitor)
library(scales)
#theme_set(theme_classic())
knitr::opts_chunk$set(fig.path = "images/",
dev = c("png", "pdf", "svg"))
df_full <- read_xlsx(here("data/MaskedCoverage-Fig3.xlsx")) %>%
clean_names() %>%
add_row(state = c("OR", "WY", "SD", "WV", "DC", "AL")) %>%
mutate(row = case_when(
state %in% c("ME") ~ 1L,
state %in% c("VT", "NH") ~ 2L,
state %in% c("WA", "ID", "MT", "ND", "MN", "IL", "WI", "MI", "NY", "RI", "MA") ~ 3L,
state %in% c("OR", "NV", "WY", "SD", "IA", "IN", "OH", "PA", "NJ", "CT") ~ 4L,
state %in% c("CA", "UT", "CO", "NE", "MO", "KY", "WV", "VA", "MD", "DE") ~ 5L,
state %in% c("AZ", "NM", "KS", "AR", "TN", "NC", "SC", "DC") ~ 6L,
state %in% c("OK", "LA", "MS", "AL", "GA") ~ 7L,
state %in% c("TX", "FL") ~ 8L,
TRUE ~ 0L),
col = case_when(
state %in% c("WA", "OR", "CA") ~ 1L,
state %in% c("ID", "NV", "UT", "AZ") ~ 2L,
state %in% c("MT", "WY", "CO", "NM") ~ 3L,
state %in% c("ND", "SD", "NE", "KS", "OK", "TX") ~ 4L,
state %in% c("MN", "IA", "MO", "AR", "LA") ~ 5L,
state %in% c("IL", "IN", "KY", "TN", "MS") ~ 6L,
state %in% c("WI", "OH", "WV", "NC", "AL") ~ 7L,
state %in% c("MI", "PA", "VA", "SC", "GA") ~ 8L,
state %in% c("NY", "NJ", "MD", "DC", "FL") ~ 9L,
state %in% c("VT", "RI", "CT", "DE") ~ 10L,
state %in% c("ME", "NH", "MA") ~ 11L,
TRUE ~ 0L
))
df_miss <- df_full %>%
filter(!is.na(readmission_rate))
g1 <- ggplot(df_miss, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = readmission_rate * 100), alpha = 0.8) +
geom_text(data = df_full, aes(label = state), color = "black", nudge_y = 0.05) +
geom_text(aes(label = percent(readmission_rate, 0.01)), nudge_y = -0.1, size = 2.5) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$readmission_rate * 100), mid = "#E7D9C6") +
scale_size(range = c(3, 30)) +
scale_y_reverse() +
theme(plot.margin = margin(r = 30)) +
labs(color = "Readmission",
size = "Coverage")
g2 <- ggplot(df_miss, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = colorectal_cancer_screenings), alpha = 0.8) +
geom_text(data = df_full, aes(label = state), color = "black", nudge_y = 0.05) +
geom_text(aes(label = percent(colorectal_cancer_screenings/100, 0.01)), nudge_y = -0.1, size = 2.5) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#E7D9C6") +
scale_size(range = c(3, 30)) +
scale_y_reverse() +
labs(color = "Cancer Screening",
size = "Coverage")
g1 + g2 + plot_layout(guides = "collect")
Figure S1: This figure recreates Figure 3 in Basole et al. (2021).
theme_set(theme_classic())
g1 <- ggplot(df_miss, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
labs(x = "Coverage (%)", y = "Cancer Screening (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 73, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$colorectal_cancer_screenings), 0.001)}"))
g2 <- ggplot(df_miss, aes(coverage_obscured * 100, readmission_rate * 100)) +
geom_point() +
labs(x = "Coverage (%)", y = "Readmission (%)") +
geom_smooth(method = loess, formula = y ~ x) +
annotate("richtext", x = 80, y = 15.3, label.color = NA, fill = "transparent", label = glue::glue("R<sup>2</sup> = {scales::comma(cor(df_miss$coverage_obscured, df_miss$readmission_rate), 0.001)}"))
g1 + g2
Figure S2: This is an alternative graph design for Figure 1.
set.seed(2021)
lineup_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_miss, n = 20, pos = 3)
plot_lineup_theirs <- ggplot(lineup_data, aes(col, row)) +
geom_point(aes(size = coverage_obscured, color = colorectal_cancer_screenings), alpha = 0.8) +
theme_void() +
scale_color_gradient2(low = "#3F6E9A", high = "#AB4C30", midpoint = median(df_miss$colorectal_cancer_screenings), mid = "#E7D9C6") +
scale_size(range = c(1, 5)) +
scale_y_reverse(expand = c(0.1, 0.2)) +
guides(color = "none", size = "none") +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5))
plot_lineup_theirs
Figure S3: The lineup for the tile grid plot.
plot_lineup_ours <- ggplot(lineup_data, aes(coverage_obscured * 100, colorectal_cancer_screenings)) +
geom_point() +
geom_smooth(method = loess, formula = y ~ x) +
facet_wrap(~.sample, ncol = 5) +
scale_x_continuous(expand = c(0.1, 0.1)) +
theme(legend.position = "bottom",
strip.text = element_text(size = 18, margin = margin(t = 3, b = 3)),
strip.background = element_rect(color = "black", size = 1.5),
axis.text = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.ticks.length = unit(0, "pt"))
plot_lineup_ours
Figure S4: The lineup for the scatter plot.
The following are plots based on data that purposely modifies cancer screening to induce a higher association with the coverage.
df_false <- df_miss %>%
arrange(coverage_obscured) %>%
mutate(colorectal_cancer_screenings = sort(colorectal_cancer_screenings))
lineup_false_data <- null_permute("colorectal_cancer_screenings") %>%
lineup(true = df_false, n = 20, pos = 5)
plot_lineup_theirs %+% lineup_false_data
Figure S5: Which plot looks the most strikingly different to you?
plot_lineup_ours %+% lineup_false_data
Figure S6: The above shows a lineup for data that was purposely manipulated so that two variables have a higher association. How easy was it to spot the data plot compared to Figure 5?
We thank Basole et al. (2021) for supplying us the synthetic data to draw the above plots.
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.1 (2020-06-06)
## os macOS 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_AU.UTF-8
## ctype en_AU.UTF-8
## tz Australia/Melbourne
## date 2021-09-19
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.0.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
## bookdown 0.22.17 2021-08-07 [1] Github (rstudio/bookdown@9615b14)
## broom 0.7.9 2021-07-27 [1] CRAN (R 4.0.2)
## bslib 0.2.5 2021-05-12 [1] CRAN (R 4.0.1)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.0.0)
## class 7.3-19 2021-05-03 [2] CRAN (R 4.0.2)
## cli 3.0.1 2021-07-17 [1] CRAN (R 4.0.2)
## cluster 2.1.2 2021-04-17 [2] CRAN (R 4.0.2)
## colorspace 2.0-1 2021-05-04 [1] CRAN (R 4.0.2)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.2)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.2)
## dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.0.2)
## DEoptimR 1.0-8 2016-11-19 [2] CRAN (R 4.0.0)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
## diptest 0.76-0 2021-05-04 [2] CRAN (R 4.0.2)
## dplyr * 1.0.7 2021-06-18 [1] CRAN (R 4.0.2)
## ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
## evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.0)
## fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
## farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.2)
## flexmix 2.3-17 2020-10-12 [1] CRAN (R 4.0.2)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.2)
## fpc 2.2-9 2020-12-06 [2] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.1.0 2020-10-31 [2] CRAN (R 4.0.2)
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.0.1)
## ggtext * 0.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gridtext 0.1.4 2020-12-10 [1] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 4.0.0)
## haven 2.4.1 2021-04-23 [2] CRAN (R 4.0.2)
## here * 1.0.1 2020-12-13 [2] CRAN (R 4.0.2)
## highr 0.9 2021-04-16 [2] CRAN (R 4.0.2)
## hms 1.1.0 2021-05-17 [1] CRAN (R 4.0.2)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## janitor * 2.1.0 2021-01-05 [2] CRAN (R 4.0.2)
## jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
## kernlab 0.9-29 2019-11-12 [2] CRAN (R 4.0.0)
## knitr 1.33 2021-04-24 [1] CRAN (R 4.0.2)
## labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.2)
## lattice 0.20-44 2021-05-02 [2] CRAN (R 4.0.2)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.2)
## lubridate 1.7.10 2021-02-26 [1] CRAN (R 4.0.2)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
## markdown 1.1 2019-08-07 [2] CRAN (R 4.0.0)
## MASS 7.3-54 2021-05-03 [1] CRAN (R 4.0.2)
## Matrix 1.3-3 2021-05-04 [2] CRAN (R 4.0.2)
## mclust 5.4.7 2020-11-20 [2] CRAN (R 4.0.2)
## mgcv 1.8-35 2021-04-18 [2] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [2] CRAN (R 4.0.0)
## modeltools 0.2-23 2020-03-05 [2] CRAN (R 4.0.0)
## moments 0.14 2015-01-05 [2] CRAN (R 4.0.0)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.0.0)
## nlme 3.1-152 2021-02-04 [2] CRAN (R 4.0.2)
## nnet 7.3-16 2021-05-03 [2] CRAN (R 4.0.2)
## nullabor * 0.3.9 2020-02-25 [1] CRAN (R 4.0.2)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.0.2)
## pillar 1.6.2 2021-07-29 [1] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.0)
## prabclus 2.3-2 2020-01-08 [2] CRAN (R 4.0.0)
## purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.0.0)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.0.1)
## Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.2)
## readr * 2.0.1 2021-08-10 [1] CRAN (R 4.0.2)
## readxl * 1.3.1 2019-03-13 [2] CRAN (R 4.0.0)
## reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.2)
## rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.2)
## rmarkdown 2.10 2021-08-06 [1] CRAN (R 4.0.1)
## robustbase 0.93-7 2021-01-04 [2] CRAN (R 4.0.2)
## rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.1)
## rvest 1.0.1 2021-07-26 [1] CRAN (R 4.0.2)
## sass 0.4.0 2021-05-12 [1] CRAN (R 4.0.2)
## scales * 1.1.1 2020-05-11 [2] CRAN (R 4.0.0)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.0)
## snakecase 0.11.0 2019-05-25 [2] CRAN (R 4.0.0)
## stringi 1.7.3 2021-07-16 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [2] CRAN (R 4.0.0)
## tibble * 3.1.3 2021-07-23 [1] CRAN (R 4.0.2)
## tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.0.2)
## tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.2)
## tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.0.2)
## tzdb 0.1.2 2021-07-20 [1] CRAN (R 4.0.2)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.2)
## vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
## withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
## xfun 0.24 2021-06-15 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 4.0.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
##
## [1] /Users/etan0038/Library/R/4.0/library
## [2] /Library/Frameworks/R.framework/Versions/4.0/Resources/library